/* ==================================================================== 
 Licensed to the Apache Software Foundation (ASF) under one or more 
 contributor license agreements.  See the NOTICE file distributed with 
 this work for additional information regarding copyright ownership. 
 The ASF licenses this file to You under the Apache License, Version 2.0 
 (the "License"); you may not use this file except in compliance with 
 the License.  You may obtain a copy of the License at 

 http://www.apache.org/licenses/LICENSE-2.0 

 Unless required by applicable law or agreed to in writing, software 
 distributed under the License is distributed on an "AS IS" BASIS, 
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
 See the License for the specific language governing permissions and 
 limitations under the License. 
 ==================================================================== */

package org.apache.poi.ext;

import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.ss.usermodel.BuiltinFormats;
import org.apache.poi.ss.usermodel.DataFormatter;
import org.apache.poi.ss.usermodel.DateUtil;
import org.apache.poi.xssf.eventusermodel.ReadOnlySharedStringsTable;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.StylesTable;
import org.apache.poi.xssf.usermodel.XSSFCellStyle;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.Attributes;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/**
 * 使用CVS模式解决XLSX文件，可以有效解决用户模式内存溢出的问题
 * 该模式是POI官方推荐的读取大数据的模式，在用户模式下，数据量较大、Sheet较多、或者是有很多无用的空行的情况
 * ，容易出现内存溢出,用户模式读取Excel的典型代码如下： <br>
 * FileInputStream in=new FileInputStream("c:\\test.xlsx"); <br>
 * Workbook wb=new XSSFWorkbook(in); <br>
 * 或者 Workbook workbook = WorkbookFactory.create(in); 线程安全的实现
 * 
 * @author ice1938@qqq.com
 */
public abstract class AbstractXlsxProcess implements ProcessExcel {
	/**
	 * Logger for this class
	 */
	private static final Logger logger = LoggerFactory.getLogger(AbstractXlsxProcess.class);

	/**
	 * The type of the data value is indicated by an attribute on the cell. The
	 * value is usually in a "v" element within the cell.
	 */
	enum xssfDataType {
		BOOL, ERROR, FORMULA, INLINESTR, SSTINDEX, NUMBER,
	}

	/**
	 * 使用xssf_sax_API处理Excel,请参考：
	 * http://poi.apache.org/spreadsheet/how-to.html#xssf_sax_api
	 * <p/>
	 * Also see Standard ECMA-376, 1st edition, part 4, pages 1928ff, at
	 * http://www.ecma-international.org/publications/standards/Ecma-376.htm
	 * <p/>
	 * A web-friendly version is http://openiso.org/Ecma/376/Part4
	 */
	class XSSFSheetXMLHandler extends DefaultHandler {

		/**
		 * Table with styles
		 */
		private StylesTable stylesTable;

		/**
		 * Table with unique strings
		 */
		private ReadOnlySharedStringsTable sharedStringsTable;

		// Set when V start element is seen
		private boolean vIsOpen;

		// Set when cell start element is seen;
		// used when cell close element is seen.
		private xssfDataType nextDataType;

		// Used to format numeric cell values.
		private short formatIndex;
		private String formatString;
		private final DataFormatter formatter;

		private int thisColumn = -1;
		// The last column printed to the output stream
		private int lastColumnNumber = -1;

		// Gathers characters as they are seen.
		private StringBuffer value;
		private List<String> record;
		private List<List<String>> rows = new ArrayList<List<String>>();
		private boolean isCellNull = false;

		private int sheetIndex;

		private int rowNum;

		private int nextRowNum;

		/**
		 * Accepts objects needed while parsing.
		 * 
		 * @param styles
		 *            Table of styles
		 * @param strings
		 *            Table of shared strings
		 * @param sheetIndex
		 * @param cols
		 *            Minimum number of columns to show
		 * @param target
		 *            Sink for output
		 */
		public XSSFSheetXMLHandler(StylesTable styles, ReadOnlySharedStringsTable strings, int sheetIndex) {
			this.stylesTable = styles;
			this.sharedStringsTable = strings;
			this.value = new StringBuffer();
			this.nextDataType = xssfDataType.NUMBER;
			this.formatter = new DataFormatter();
			this.sheetIndex = sheetIndex;
			record = new ArrayList<String>();
			rows.clear();// 每次读取都清空行集合
		}

		/*
		 * (non-Javadoc)
		 * 
		 * @see
		 * org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String,
		 * java.lang.String, java.lang.String, org.xml.sax.Attributes)
		 */
		@Override
        public void startElement(String uri, String localName, String name, Attributes attributes) throws SAXException {

			if ("inlineStr".equals(name) || "v".equals(name)) {
				vIsOpen = true;
				// Clear contents cache
				value.setLength(0);
			}
			// c => cell
			else if ("c".equals(name)) {
				// Get the cell reference
				String r = attributes.getValue("r");
				int firstDigit = -1;
				for (int c = 0; c < r.length(); ++c) {
					if (Character.isDigit(r.charAt(c))) {
						firstDigit = c;
						break;
					}
				}
				thisColumn = nameToColumn(r.substring(0, firstDigit));

				// Set up defaults.
				this.nextDataType = xssfDataType.NUMBER;
				this.formatIndex = -1;
				this.formatString = null;
				String cellType = attributes.getValue("t");
				String cellStyleStr = attributes.getValue("s");
				if ("b".equals(cellType))
					nextDataType = xssfDataType.BOOL;
				else if ("e".equals(cellType))
					nextDataType = xssfDataType.ERROR;
				else if ("inlineStr".equals(cellType))
					nextDataType = xssfDataType.INLINESTR;
				else if ("s".equals(cellType))
					nextDataType = xssfDataType.SSTINDEX;
				else if ("str".equals(cellType))
					nextDataType = xssfDataType.FORMULA;
				else if (cellStyleStr != null) {
					// It's a number, but almost certainly one
					// with a special style or format
					int styleIndex = Integer.parseInt(cellStyleStr);
					XSSFCellStyle style = stylesTable.getStyleAt(styleIndex);
					this.formatIndex = style.getDataFormat();
					this.formatString = style.getDataFormatString();
					if (this.formatString == null)
						this.formatString = BuiltinFormats.getBuiltinFormat(this.formatIndex);
				}
			} else if ("row".equals(name)) {
				String rowNumStr = attributes.getValue("r");
				if (rowNumStr != null) {
					rowNum = Integer.parseInt(rowNumStr) - 1;
				} else {
					rowNum = nextRowNum;
				}
			}

		}

		/*
		 * (non-Javadoc)
		 * 
		 * @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String,
		 * java.lang.String, java.lang.String)
		 */
		@Override
        public void endElement(String uri, String localName, String name) throws SAXException {

			String thisStr = null;

			// v => contents of a cell
			if ("v".equals(name)) {
				// Process the value contents as required.
				// Do now, as characters() may be called more than once
				switch (nextDataType) {

				case BOOL:
					char first = value.charAt(0);
					thisStr = first == '0' ? "FALSE" : "TRUE";
					break;

				case ERROR:
					thisStr = "\"ERROR:" + value.toString() + '"';
					break;

				case FORMULA:
					// A formula could result in a string value,
					// so always add double-quote characters.
					// thisStr = '"' + value.toString() + '"';
					thisStr = value.toString();
					break;

				case INLINESTR:
					// TODO: have seen an example of this, so it's untested.
					XSSFRichTextString rtsi = new XSSFRichTextString(value.toString());
					// thisStr = '"' + rtsi.toString() + '"';
					thisStr = rtsi.toString();
					break;

				case SSTINDEX:
					String sstIndex = value.toString();
					try {
						int idx = Integer.parseInt(sstIndex);
						XSSFRichTextString rtss = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx));
						// thisStr = '"' + rtss.toString() + '"';
						thisStr = rtss.toString();
					} catch (NumberFormatException ex) {
						logger.error("Failed to parse SST index '" + sstIndex + "': " + ex.toString(), ex); //$NON-NLS-1$

					}
					break;

				case NUMBER:
					String n = value.toString();
					// 判断是否是日期格式
					if (DateUtil.isADateFormat(this.formatIndex, n)) {
						Double d = Double.parseDouble(n);
						Date date = DateUtil.getJavaDate(d);
						thisStr = formateDateToString(date);
					} else if (this.formatString != null)
						thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex,
						        this.formatString);
					else
						thisStr = n;
					break;

				default:
					thisStr = "(TODO: Unexpected type: " + nextDataType + ")";
					break;
				}

				// Output after we've seen the string contents
				// Emit commas for any fields that were missing on this row
				if (lastColumnNumber == -1) {
					lastColumnNumber = 0;
				}
				// 判断单元格的值是否为空
				if (thisStr == null || "".equals(isCellNull)) {
					isCellNull = true;// 设置单元格是否为空值
				}
				record.add(thisStr);
				// Update column
				if (thisColumn > -1)
					lastColumnNumber = thisColumn;

			} else if ("row".equals(name)) {

				if (isCellNull == false) {// 判断是否空行
					List<String> t = new ArrayList<String>();
					t.addAll(record);
					optRows(this.sheetIndex, rowNum, record);
					rows.add(t);
					isCellNull = false;
					record.clear();
				}
				lastColumnNumber = -1;
				nextRowNum = rowNum + 1;
			}
		}

		public List<List<String>> getRows() {
			return rows;
		}

		/**
		 * Captures characters only if a suitable element is open. Originally
		 * was just "v"; extended for inlineStr also.
		 */
		@Override
        public void characters(char[] ch, int start, int length) throws SAXException {
			if (vIsOpen)
				value.append(ch, start, length);
		}

		/**
		 * Converts an Excel column name like "C" to a zero-based index.
		 * 
		 * @param name
		 * @return Index corresponding to the specified name
		 */
		private int nameToColumn(String name) {
			int column = -1;
			for (int i = 0; i < name.length(); ++i) {
				int c = name.charAt(i);
				column = (column + 1) * 26 + c - 'A';
			}
			return column;
		}

		private String formateDateToString(Date date) {
			SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");// 格式化日期
			return sdf.format(date);

		}

	}

	// /////////////////////////////////////

	;

	/**
	 * Parses and shows the content of one sheet using the specified styles and
	 * shared-strings tables.
	 * 
	 * @param styles
	 * @param strings
	 * @param sheetInputStream
	 * @param sheetIndex
	 */
	private List<List<String>> processSheet(StylesTable styles, ReadOnlySharedStringsTable strings,
	        InputStream sheetInputStream, int sheetIndex) throws IOException, ParserConfigurationException,
	        SAXException {

		InputSource sheetSource = new InputSource(sheetInputStream);
		SAXParserFactory saxFactory = SAXParserFactory.newInstance();
		SAXParser saxParser = saxFactory.newSAXParser();
		XMLReader sheetParser = saxParser.getXMLReader();
		XSSFSheetXMLHandler handler = new XSSFSheetXMLHandler(styles, strings, sheetIndex);
		sheetParser.setContentHandler(handler);
		sheetParser.parse(sheetSource);
		return handler.getRows();
	}

	/**
	 * 初始化这个处理程序 将
	 * 
	 * @throws IOException IOException
	 * @throws OpenXML4JException OpenXML4JException
	 * @throws ParserConfigurationException ParserConfigurationException
	 * @throws SAXException SAXException
	 */
	@Override
	public List<List<String>> process(InputStream in, int... sheetIndex) throws IOException, OpenXML4JException,
	        ParserConfigurationException, SAXException {
		OPCPackage xlsxPackage = OPCPackage.open(in);
		ReadOnlySharedStringsTable strings = new ReadOnlySharedStringsTable(xlsxPackage);
		XSSFReader xssfReader = new XSSFReader(xlsxPackage);
		List<List<String>> list = null;
		StylesTable styles = xssfReader.getStylesTable();
		XSSFReader.SheetIterator iter = (XSSFReader.SheetIterator) xssfReader.getSheetsData();
		int index = 0;
		for (Iterator<InputStream> iterator = iter; iterator.hasNext();) {
			InputStream stream = iter.next();
			boolean continueFlag = false;
			if (sheetIndex.length > 0) {
				for (int i = 0; i < sheetIndex.length; i++) {
					int j = sheetIndex[i];
					if (index != j) {
						continueFlag = true;
						index++;
					}
				}
			}
			if (continueFlag) {
				continue;
			}
			list = processSheet(styles, strings, stream, index);
			stream.close();
			index++;
		}
		if (in != null) {
			in.close();
		}
		if (xlsxPackage != null) {
			xlsxPackage.close();
		}
		return list;
	}

}
